

https://upload.wikimedia.org/wikipedia/commons/1/10/JIE_Sankey_V5_Fig1.png

https://upload.wikimedia.org/wikipedia/commons/2/29/Minard.png

https://www.iea.org/sankey/#?c=World&s=Balance

https://ec.europa.eu/eurostat/web/energy/energy-flow-diagrams

https://www.tagesschau.de/inland/btw21/waehlerwanderung-bundestagswahl-103.html

https://www.tagesschau.de/inland/btw21/waehlerwanderung-bundestagswahl-103.html

https://www.tagesschau.de/inland/btw21/waehlerwanderung-bundestagswahl-103.html

https://www.economist.com/graphic-detail/2019/11/01/a-british-election-and-other-uncertainties

https://download.statistik-berlin-brandenburg.de/0c8e82331bc2327a/802f7f020114/SB_A01-03-00_2020j01_BE.xlsx
data = {
'2020': {
'start': 3669491,
'births': 38693,
'immigration': 142923,
'deaths': -37642,
'emmigration': -144881,
'end': -3664088
}
}
flows = list(data['2020'].values())
labels = list(data['2020'].keys())
flows, labels
([3669491, 38693, 142923, -37642, -144881, -3664088], ['start', 'births', 'immigration', 'deaths', 'emmigration', 'end'])
import matplotlib.pyplot as plt
from matplotlib.sankey import Sankey
sankey = Sankey() # init
sankey.add(flows=flows, labels=labels) # add flow(s)
sankey.finish() # create
plt.show() # show
scale = 0.0000001
sankey = Sankey(scale=scale) # init with scale!
sankey.add(flows=flows, labels=labels)
sankey.finish()
plt.show()
sankey = Sankey(scale=scale)
# 0 (inputs from the left, outputs to the right),
# 1 (from and to the top) or -1 (from and to the bottom).
orientations = [0, -1, 1, -1, 1, 0]
# add flow(s) with orientations
sankey.add(flows=flows, labels=labels, orientations=orientations)
sankey.finish()
plt.show()
pathlengths=[0.1, 0.1, 0.1, 0.1, 0.1, 0.1]
sankey = Sankey(scale=scale)
sankey.add(
flows=flows, labels=labels,
orientations=orientations,
pathlengths=pathlengths,
) # add flow(s) with orientations and pathlengths
sankey.finish()
plt.show()
def format_number(n):
return '{:,}'.format(n) # add thousand separator
# add number format
sankey = Sankey(scale=scale, format=format_number)
sankey.add(
flows=flows, labels=labels,
orientations=orientations,
pathlengths=pathlengths,
)
sankey.finish()
plt.show()
sankey = Sankey(scale=scale, format=format_number)
sankey.add(
flows=flows, labels=labels,
orientations=orientations,
pathlengths=pathlengths,
facecolor='lightgray' # change color
)
sankey.finish()
plt.title("Berlin Census 2020") # add title
plt.show()
# add second year
data = {
'2019': {
'start 2019': 3644826,
'births': 39503,
'immigration': 184744,
'deaths': -34739,
'emmigration': -161513,
'end 2019': -3669491
},
'2020': {
'start 2020': 3669491,
'births': 38693,
'immigration': 142923,
'deaths': -37642,
'emmigration': -144881,
'end 2020': -3664088
}
}
flows_2019 = list(data['2019'].values())
labels_2019 = list(data['2019'].keys())
labels_2019[-1] = None # remove last label
flows_2020 = list(data['2020'].values())
labels_2020 = list(data['2020'].keys())
pathlengths=[0.3, 0.3, 0.1, 0.1, 0.3, 0.3]
sankey = Sankey(scale=scale, format=format_number)
sankey.add(
flows=flows_2019, labels=labels_2019,
orientations=orientations,
pathlengths=pathlengths,
facecolor='lightgray'
)
sankey.add(
flows=flows_2020, labels=labels_2020,
orientations=orientations,
pathlengths=pathlengths,
prior=0, connect=(5, 0), # connect second flow to first
facecolor='darkgray'
)
sankey.finish()
plt.title("Berlin Census 2019 & 2020") # add title
plt.show()
scale argument for large/small numbersformat difficult to handleimport pandas as pd
from pySankey.sankey import sankey
# create DataFrame from 2020 data
df_2020 = pd.DataFrame([
# start -> deaths
{'source': 'start', 'target': 'deaths', 'value': 37642},
# start -> emmigration
{'source': 'start', 'target': 'emmigration', 'value': 144881},
# start -> end
{'source': 'start', 'target': 'end', 'value': 3669491},
# births -> end
{'source': 'births', 'target': 'end', 'value': 38693},
# immigration -> end
{'source': 'immigration', 'target': 'end', 'value': 142923},
])
df_2020
| source | target | value | |
|---|---|---|---|
| 0 | start | deaths | 37642 |
| 1 | start | emmigration | 144881 |
| 2 | start | end | 3669491 |
| 3 | births | end | 38693 |
| 4 | immigration | end | 142923 |
sankey(
left=df_2020['source'], right=df_2020['target'],
leftWeight=df_2020['value'],
fontsize=14,
#figure_name="Berlin Census 2020", # used for saving png, not title
)
figure_name not in docstring: used for saving file (not title)from psankey.sankey import sankey
nodes, fig, ax = sankey(
df_2020, aspect_ratio=4/3,
nodelabels=True, linklabels=True, labelsize=5,
)
plt.title("Berlin Census 2020") # add title
plt.show()
pd.DataFramesREADME.mdnodemodifier to highlight nodesimport holoviews as hv
from holoviews import opts, dim
hv.extension('bokeh')
width, height = 600, 400
# run example code
sankey = hv.Sankey([
['A', 'X', 5], ['A', 'Y', 7], ['A', 'Z', 6],
['B', 'X', 2], ['B', 'Y', 9], ['B', 'Z', 4]
])
sankey.opts(width=width, height=height)
# pass DataFrame from previous example
sankey = hv.Sankey(df_2020)
sankey.opts(width=width, height=height)
# create DataFrame from 2019 & 2020 data
df = pd.DataFrame([
# 2019
{'source': '2019', 'target': '2020', 'value': 3644826, 'color': 'lightgray'},
{'source': '2019', 'target': 'deaths `19', 'value': 34739, 'color': '#a6cee3'},
{'source': '2019', 'target': 'emmigration `19', 'value': 161513, 'color': '#1f78b4'},
{'source': 'births `19', 'target': '2020', 'value': 39503, 'color': '#b2df8a'},
{'source': 'immigration `19', 'target': '2020', 'value': 184744, 'color': '#33a02c'},
# 2020
{'source': '2020', 'target': '2021', 'value': 3669491, 'color': 'lightgray'},
{'source': '2020', 'target': 'deaths `20', 'value': 37642, 'color': '#a6cee3'},
{'source': '2020', 'target': 'emmigration `20', 'value': 144881, 'color': '#1f78b4'},
{'source': 'births `20', 'target': '2021', 'value': 38693, 'color': '#b2df8a'},
{'source': 'immigration `20', 'target': '2021', 'value': 142923, 'color': '#33a02c'},
])
df.head(3)
| source | target | value | color | |
|---|---|---|---|---|
| 0 | 2019 | 2020 | 3644826 | lightgray |
| 1 | 2019 | deaths `19 | 34739 | #a6cee3 |
| 2 | 2019 | emmigration `19 | 161513 | #1f78b4 |
sankey = hv.Sankey(df)
sankey.opts(width=width, height=height, cmap='Set2',
edge_color=dim('source').str(),
node_color=dim('target').str())
import plotly.graph_objects as go
# example from https://plotly.com/python/sankey-diagram/
fig = go.Figure(data=[go.Sankey(
node = dict(
pad = 15,
thickness = 20,
line = dict(color="black", width=0.5),
label = ["A1", "A2", "B1", "B2", "C1", "C2"],
color = "blue"
),
link = dict(
# indices correspond to labels, eg A1, A2, A1, B1, ...
source = [0, 1, 0, 2, 3, 3],
target = [2, 3, 3, 4, 4, 5],
value = [8, 4, 2, 8, 4, 2]
))])
fig.update_layout(
title_text="Basic Sankey Diagram",
width=width, height=height, font_size=10)
fig.show()
# create nodes with index from DataFrame
# https://stackoverflow.com/a/69464558
import numpy as np
nodes = np.unique(df[["source", "target"]], axis=None)
nodes = pd.Series(index=nodes, data=range(len(nodes)))
nodes
2019 0 2020 1 2021 2 births `19 3 births `20 4 deaths `19 5 deaths `20 6 emmigration `19 7 emmigration `20 8 immigration `19 9 immigration `20 10 dtype: int64
fig = go.Figure(
data=[
go.Sankey(
node={
"label": nodes.index,
},
link={
"source": nodes.loc[df["source"]],
"target": nodes.loc[df["target"]],
"value": df["value"],
})
]
)
fig.update_layout(
title_text="Berin Census 2019 & 2020",
width=width, height=height, font_size=10)
fig.show()
x = [.1, .4, .7, # years
.1, .4, # births
.3, .6, # deaths
.3, .6, # emmigration
.1, .4, # immigration
]
y = [.5, .5, .5, # years
.75, .8, # births
.2, .25, # deaths
.25, .3, # emmigration
.7, .75, # immigration
]
color = ["darkgray", "darkgray", "darkgray",
"#b2df8a", "#b2df8a", # light green
"#a6cee3", "#a6cee3", # light blue
"#1f78b4", "#1f78b4", # dark blue
"#33a02c", "#33a02c", # dark green
]
x, y
([0.1, 0.4, 0.7, 0.1, 0.4, 0.3, 0.6, 0.3, 0.6, 0.1, 0.4], [0.5, 0.5, 0.5, 0.75, 0.8, 0.2, 0.25, 0.25, 0.3, 0.7, 0.75])
fig = go.Figure(
data=[
go.Sankey(
arrangement = "freeform",
node={
"label": nodes.index,
"x": x,
"y": y,
"pad": 100, # padding between nodes,
"color": color,
},
link={
"source": nodes.loc[df["source"]],
"target": nodes.loc[df["target"]],
"value": df["value"],
"color": df["color"],
})
]
)
fig.update_layout(
title_text="Berin Census 2019 & 2020", font_size=10
)
fig.show()

https://www.ipoint-systems.com/blog/from-data-to-knowledge-the-power-of-elegant-sankey-diagrams/